#clean up
rm(list=ls())

#set seed
set.seed(715130425)

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(spdep)
#install.packages('/Volumes/rgdal/rgdal_0.9-1.tgz',repos=NULL)
library(rgdal)
#install.packages('/Users/jamie/Downloads/prevR_2.9.tgz',repos=NULL)
#library(prevR)
library(car)
library(xtable)
library(maptools)

#options
options(scipen=8)

#Set to working directory. Adjust the command below to reflect YOUR working directory.
#setwd('/Volumes/MONOGAN/gillAreal/ccesCode/')
#setwd('/Users/monogan/Documents/gillAreal/ccesCode/')
#setwd('/Users/jamie/Desktop/nhgis0003_shape/nhgis0003_shapefile_us_tract_1970/')

###LOAD THE 2010 CENSUS DATA###
#census<-read.csv('block2010.csv')
#census<-read.csv('/Users/jamie/Documents/BIGdata/census2010clean/block2010.csv')
#census<-read.csv('/Users/monogan/Documents/BIGdata/census2010clean/block2010.csv')
census<-read.csv('block2010.csv')
#census<-read.csv('block2010.csv',quote="",row.names=NULL,stringsAsFactors=FALSE)
#dim(census)
#summary(census)

###MAP THE MOVED DATA###
sub.AK<-subset(census,subset=STATE=="Alaska")
sub.HI<-subset(census,subset=STATE=="Hawaii")
sub.48<-subset(census,subset=STATE!="Hawaii" & STATE!="Alaska")
#png("AKHImovedBlocks.png",width=960,height=480)
##postscript("AKHImovedBlocks.ps",width=6,height=3)
plot(y=sub.48$blockNorth,x=sub.48$blockEast,pch=".",xlab="Eastings",ylab="Northings",xlim=c(-5482,2255),ylim=c(-1331,1633))
points(y=sub.AK$blockNorth,x=sub.AK$blockEast,pch=".",col="red")
points(y=sub.HI$blockNorth,x=sub.HI$blockEast,pch=".",col="blue")
#dev.off()

###REVISE THE 2010 CENSUS DATA###
census$age.sex<-apply(census[,c("H76007","H76008","H76009","H76010","H76011","H76012","H76013","H76014",
		"H76015","H76016","H76017","H76018","H76019","H76020","H76021","H76022","H76023","H76024","H76025",
		"H76031","H76032","H76033","H76034","H76035","H76036","H76037",
		"H76038","H76039","H76040","H76041","H76042","H76043","H76044","H76045","H76046","H76047","H76048","H76049")],1,sum)
census<-subset(census,subset=!STATE%in%c("223","368") & age.sex>0)#Alaska and Hawaii are included, but they've been moved.
census$COUNTYPOP10<-as.numeric(gsub(",","",census$COUNTYPOP10))
#obs<-table(census$STATE); obs
#table(census$age.sex==0)

#indices for state and legislative district
census$lower<-paste(census$STATE,census$SLDLA)
census$upper<-paste(census$STATE,census$SLDUA)

#save variable classes, change factors to characters
var.class<-rep(NA,200)
for(i in 1:200){
	if(class(census[,i])=="factor"){census[,i]<-as.character(census[,i])}
	var.class[i]<-class(census[,i])
	}

###SAMPLE FROM THE CENSUS DATA###
#cover every census tract
#length(table(census$TRACTA))#23764
#names(table(census$TRACTA))
sp.tract<-split(census,census$TRACTA)
samples.tract<-lapply(sp.tract,function(x) x[sample(1:nrow(x),1,TRUE,prob=x$TOTALBLOCK),])
tract.draws<-as.data.frame(t(sapply(samples.tract,unlist)))
for(i in 1:199){
	tract.draws[,i]<-as.character(tract.draws[,i])
	tract.draws[,i]<-as(tract.draws[,i],var.class[i])
	}
rm(sp.tract,samples.tract)

#cover every state house lower chamber district
#length(table(census$lower))#4722
#names(table(census$lower))
sp.lower<-split(census,census$lower)
samples.lower<-lapply(sp.lower,function(x) x[sample(1:nrow(x),1,TRUE,prob=x$TOTALBLOCK),])
lower.draws<-as.data.frame(t(sapply(samples.lower,unlist)))
for(i in 1:199){
	lower.draws[,i]<-as.character(lower.draws[,i])
	lower.draws[,i]<-as(lower.draws[,i],var.class[i])
	}
rm(sp.lower,samples.lower)

#cover every state house upper chamber district
#length(table(census$upper))#1942
#names(table(census$upper))
sp.upper<-split(census,census$upper)
samples.upper<-lapply(sp.upper,function(x) x[sample(1:nrow(x),1,TRUE,prob=x$TOTALBLOCK),])
upper.draws<-as.data.frame(t(sapply(samples.upper,unlist)))
for(i in 1:199){
	upper.draws[,i]<-as.character(upper.draws[,i])
	upper.draws[,i]<-as(upper.draws[,i],var.class[i])
	}
rm(sp.upper,samples.upper)
	
#draw from blocks in proportion to population
N<-250000
draws<-census[sample(x=nrow(census),size=N,replace=T,prob=census$TOTALBLOCK),]
#plot(x=draws$blockEast,draws$blockNorth,pch=".")

#combine everything
draws<-rbind(tract.draws,lower.draws,upper.draws,draws) 
#dim(draws)
#summary(draws)
#test<-rbind(tract.draws[1,],lower.draws[1,],upper.draws[1,],draws[1,],census[1,])
#test


#jitter coordinates
N<-dim(draws)[1]
angle<-runif(N,0,2*pi)
length<-runif(N,0,draws$blockRadius)
draws$eastings<-draws$blockEast+length*cos(angle)
draws$northings<-draws$blockNorth+length*sin(angle)
#points(x=draws$eastings,draws$northings,pch=".",col="blue")

#identifiers, rural, and eastings/northings are set
#draws[1,c(1:5,8:24,183,197:198)]

###draw covariates for each kriged point###
#initialize outputs
religion.D<-matrix(NA,nrow=N,ncol=8)
race<-rep(NA,N)
ownership<-rep(NA,N)
female<-rep(NA,N)
age<-rep(NA,N)
empstat<-rep(NA,N)
educ<-rep(NA,N)
inc14<-rep(NA,N)

#special inputs for simplicity
m.emp<-draws[,105:143]
f.emp<-draws[,144:182]
m.educ<-draws[,92:97]
f.educ<-draws[,98:103]

for(i in 1:N){
#for(i in 1:2){
#i<-1	
	#draw predictors that come from marginal distributions (given location) only
	religion.D[i,]<-rmultinom(1,1,draws[i,c("other","cathadh","ldsadh","orthadh","jewish","mslmadh","mprtadh","evanadh")])
	race[i]<-which.max(rmultinom(1,1,draws[i,c("WHITE","BLACK","OTHER")]))
	ownership[i]<-ifelse(draws$TOTHOUSEBLOCK[i]>0,rbinom(n=1,size=1,prob=draws$HOMEOWN[i]/draws$TOTHOUSEBLOCK[i]),rbinom(n=1,size=1,prob=0.651))
	
	#family income
	#Source for relative proportions within income brackets and for missing block-level data, New York Times (accessed 17 December 2015): http://www.nytimes.com/interactive/2012/01/15/business/one-percent-map.html?_r=0
	inc.init<-ifelse(sum(draws[i,c("JPNE002","JPNE003","JPNE004","JPNE005","JPNE006","JPNE007008","JPNE009010","JPNE011","JPNE012","JPNE013","JPNE014","JPNE015","JPNE016017")])>1,
		which.max(rmultinom(1,1,draws[i,c("JPNE002","JPNE003","JPNE004","JPNE005","JPNE006","JPNE007008","JPNE009010","JPNE011","JPNE012","JPNE013","JPNE014","JPNE015","JPNE016017")])),
		which.max(rmultinom(1,1,c(.07,.06,.05,.06,.05,.10,.10,.08,.10,.12,.08,.04,.09))))
	inc14[i]<-recode(inc.init,'1=1;2=2;3=3;4=4;5=5;6=6;7=7;8=8;9:11=NA;12=13;13=14')
	if(inc.init==9){inc14[i]<-9+rbinom(1,1,0.3)
		}else if(inc.init==10){inc14[i]<-10+rbinom(1,1,0.75)
			}else if(inc.init==11){inc14[i]<-12+rbinom(1,1,0.25)
				}
	
	#draw sex and age from joint distribution
	twodraw<-which.max(rmultinom(1,1,draws[i,c("H76007","H76008","H76009","H76010","H76011","H76012","H76013","H76014",
		"H76015","H76016","H76017","H76018","H76019","H76020","H76021","H76022","H76023","H76024","H76025",
		"H76031","H76032","H76033","H76034","H76035","H76036","H76037",
		"H76038","H76039","H76040","H76041","H76042","H76043","H76044","H76045","H76046","H76047","H76048","H76049")]));twodraw
	female[i]<-as.numeric(twodraw>19);twodraw;female[i]
	
	#age; assume the maximum is 98, which is what we see in the CCES
	twodraw.B<-twodraw
	twodraw.B[twodraw.B>19]<-twodraw.B-19
	if(twodraw.B==1){age[i]<-which.max(rmultinom(1,1,prob=c(138,122)))+17
		}else if(twodraw.B==2){age[i]<-20 
			}else if(twodraw.B==3){age[i]<-21 
				}else if(twodraw.B==4){age[i]<-which.max(rmultinom(1,1,prob=c(125,142,170)))+21
					}else if(twodraw.B==5){age[i]<-which.max(rmultinom(1,1,prob=c(198,221,240,263,225)))+24
						}else if(twodraw.B==6){age[i]<-which.max(rmultinom(1,1,prob=c(287,288,279,325,313)))+29
							}else if(twodraw.B==7){age[i]<-which.max(rmultinom(1,1,prob=c(282,269,321,371,317)))+34
								}else if(twodraw.B==8){age[i]<-which.max(rmultinom(1,1,prob=c(378,404,402,447,481)))+39
									}else if(twodraw.B==9){age[i]<-which.max(rmultinom(1,1,prob=c(525,532,581,552,580)))+44
										}else if(twodraw.B==10){age[i]<-which.max(rmultinom(1,1,prob=c(524,626,624,641,611)))+49
											}else if(twodraw.B==11){age[i]<-which.max(rmultinom(1,1,prob=c(503,472,459,462,442)))+54
												}else if(twodraw.B==12){age[i]<-which.max(rmultinom(1,1,prob=c(485,512)))+59
													}else if(twodraw.B==13){age[i]<-which.max(rmultinom(1,1,prob=c(449,375,340)))+61
														}else if(twodraw.B==14){age[i]<-which.max(rmultinom(1,1,prob=c(550,557)))+64
															}else if(twodraw.B==15){age[i]<-which.max(rmultinom(1,1,prob=c(476,440,338)))+66
																}else if(twodraw.B==16){age[i]<-which.max(rmultinom(1,1,prob=c(333,275,266,218,194)))+69
																	}else if(twodraw.B==17){age[i]<-which.max(rmultinom(1,1,prob=c(169,144,141,112,111)))+74
																		}else if(twodraw.B==18){age[i]<-which.max(rmultinom(1,1,prob=c(67,59,44,28,25)))+79
																			}else {age[i]<-which.max(rmultinom(1,1,prob=c(15,17,12,5,5,4,2,2,1,1,1,1,1,1)))+84}	

	#employment status by sex and age
	emp.age<-recode(twodraw.B,'1=1;2:3=2;4=3;5=4;6=5;7:8=6;9:10=7;11=8;12=9;13=10;14:15=11;16=12;17:19=13')
	if(female[i]==0){
		emp.probs<-m.emp[i,c((3*emp.age),(3*emp.age-2),(3*emp.age-1))]
		} else{
			emp.probs<-f.emp[i,c((3*emp.age),(3*emp.age-2),(3*emp.age-1))]
		}
	empstat[i]<-ifelse(sum(emp.probs)>0,which.max(rmultinom(1,1,prob=emp.probs)),which.max(rmultinom(1,1,prob=emp.probs+1)))
		
	#education by sex
	if(female[i]==0){
		educ.probs<-m.educ[i,]
		} else{
			educ.probs<-f.educ[i,]
		}
	educ[i]<-ifelse(sum(educ.probs)>0,which.max(rmultinom(1,1,prob=educ.probs)),which.max(rmultinom(1,1,prob=educ.probs+1)))

}		

#format and combine all draws together
colnames(religion.D)<-c('other','cath','mormon','ortho','jewish','islam','main','evan')
kriges<-cbind(draws[,c(1:5,8:24,183,201:202)],race,ownership,female,age,educ,inc14,empstat,religion.D)

#write out file
plot(x=kriges$eastings,y=kriges$northings,pch=".") #check graph
write.csv(kriges,"krigedPointsFine.csv",row.names=F)



###CREATE A SUBSET OF THE KRIGES JUST FOR TRAINING###
#kriges<-read.csv("krigedPointsFine.csv")
subset<-100
kriges.1<-subset(kriges,subset=STATE!="Alaska" & STATE!="Hawaii") #Eliminate AK and HI from developing function
choose<-round(seq(1,dim(kriges.1)[1],length=subset)) #Number of observations from eastings and then from northings.
kriges.1<-kriges.1[order(kriges.1$eastings),] #sort on eastings
kriges.2a<-kriges.1[choose,] #save "subset" values across range of eastings
kriges.1<-kriges.1[order(kriges.1$northings),] #sort on northings
kriges.2b<-kriges.1[choose,] #save "subset" values across range of northings
kriges.2<-rbind(kriges.2a,kriges.2b) #combine easting and northing draws
dim(kriges.2);length(table(kriges.2$northings));length(table(kriges.2$eastings)) #check spread
plot(x=kriges.2$eastings,y=kriges.2$northings)#,pch=".") #check graph
#write.csv(kriges.2,"subsetKriges.csv",row.names=F)


#testing subset. 
choose.2<-round(seq(10,dim(kriges.1)[1]-10,length=subset)) #Number of observations from eastings and then from northings.
kriges.1<-kriges.1[order(kriges.1$eastings),] #sort on eastings
kriges.3a<-kriges.1[choose.2,] #save "subset" values across range of eastings
kriges.1<-kriges.1[order(kriges.1$northings),] #sort on northings
kriges.3b<-kriges.1[choose,] #save "subset" values across range of northings
kriges.3<-rbind(kriges.3a,kriges.3b) #combine easting and northing draws
plot(x=kriges.3$eastings,y=kriges.3$northings)#,pch=".") #check graph
write.csv(kriges.3,"subsetKriges.csv",row.names=F)


###CREATE A NY SUBSET OF KRIGES###
NY.kriges<-subset(kriges,subset=STATE=="New York")
plot(x=NY.kriges$eastings,y=NY.kriges$northings,pch=".") #check graph
write.csv(NY.kriges,"krigedPointsNY.csv",row.names=F)



